Początkowe ustawienia

Css

.my_button {
  background-color: #caa8b1;
  color: #3c2a34;
  padding: 10px;
  font-size: 14px;
  border: none;
  cursor: pointer;
  width: 70px;
}
h1, h2, h3, h4, h5, h6 {
      text-align: center;
      color: #e0cdd6;
    }
body {
  background-color: #3c2a34;
}
table {
   margin:1em auto;
   color: #e0cdd6;
}

Definiowanie ścieżek

project_path = "D:/moje/projekty/chess-in-the-digital-age"
setwd(paste(project_path,"/presentation", sep = ""))
Dataset_path = paste(project_path,"/Dataset", sep = "")
selected_date = "2024-04"

knitr::opts_chunk$set(root= paste(project_path,"/presentation", sep = ""))

Definiowanie paczek

library(pander)
panderOptions('digits',7)
library(knitr)
library(rvest)
library(stringi)
library(dplyr)
library(scales)
library(ggplot2)
library(png)
library("patchwork") 
library(forcats)
img <- readPNG("../pictures/queens_gambit.png", native = TRUE)
img2 = readPNG("../pictures/candidates_tournament.png", native = TRUE)
img3 = readPNG("../pictures/covid.png", native = TRUE)
img4 = readPNG("../pictures/opening_icon.png", native = TRUE)

Wybór tablicy do scrapowania

url = "https://database.lichess.org"
path= "/html/body/div/div[2]/div/section[1]/table"
wezel = html_node(read_html(url), xpath=path)

Java-script dla przycisku

const div_my_init = document.getElementById("my_init");
  div_init.style.display = 'none';
  
var button_my_init = document.getElementById("button_my_init");
  
function hideMy_init() {
  if (div_my_init.style.display === 'none') {
    div_my_init.style.display = 'block';
    button_my_init.innerHTML = "Ukryj";
  } else {
    div_my_init.style.display = 'none';
    button_my_init.innerHTML = "Pokaż";
  }
}

Czy szachy są coraz popularniejsze?

Scrapowanie danych z lichess

# wybor tabeli z iloscia zagranych partii wedlug miesiaca
games_count_html = html_table(wezel)[[3]]

my_games_count <- data.frame(count=as.numeric(stri_replace_all(games_count_html,"",regex = "\\,"))[-length(games_count_html)])

# wybor lat do analizy na podstawie linkow .zst.torrent
hyperlinks = html_nodes(wezel, "a")
my_href = html_attr(hyperlinks, "href")
links = data.frame(links = paste(url,my_href,sep="/"))

init_links_for_download = links[seq(2,nrow(links), by=2), 1]

# wyodrebnianie lat z linkow

my_year_from_imported_links = data.frame(year=data.frame(stri_match_all(data.frame(init_links_for_download),
                                   regex = "rated_\\s*(.*?)\\s*[-]"))[,2])


Data_games_count = cbind(my_year_from_imported_links,my_games_count)

# szukanie indeksow rozpoczynajacych nowy rok w zbiorze
first_index_of_years=c()

for(i in unique(my_year_from_imported_links)[,]){
first_index_of_years = append(first_index_of_years,which(rev(Data_games_count$year)==i)[1])
}

Wizualizacja

my_plot = ggplot(Data_games_count, aes(x = rev(1:nrow(my_games_count)), y = count)) +

  # typ wykresu
  geom_bar(stat = "identity",aes(fill = year)) +
  
  # opis
  labs(x = "Years", y = "Number of chess games", title = "Number of chess games played on the lichess website") +

  # legenda
  scale_fill_manual(name = "Years", values = unique(Data_games_count$year)) +
  expand_limits(x = c(0, NA), y = c(0,NA)) +
  scale_x_continuous(breaks = c(rev(first_index_of_years)),
                     labels = c(rev(unique(my_year_from_imported_links)[,]))) +
  scale_y_continuous(labels = unit_format(unit = "M", scale = 1e-6)) +
  theme(text = element_text(size = 20),
        axis.text.x = element_text(angle = 90, hjust = 1))
my_plot

ggsave("../poster/Data_games_countv1.png")

Szukanie powodów znacznego wzrostu popularności szachów w 2020 roku

# strzalka

my_plot + geom_segment(aes(70, 68000000, xend = 86, yend = 62000000),
               linewidth=1.2,
               arrow = arrow(length = unit(5, "mm"))) +
  
  # zdjecia
    inset_element(p = img, 
                left = 0.02, 
                bottom = 0.54, 
                right = 0.42, 
                top = 0.9) +
  inset_element(p = img2, 
                left = 0.05, 
                bottom = 0.29, 
                right = 0.5, 
                top = 0.52)+
  inset_element(p = img3, 
                left = 0.44, 
                bottom = 0.69, 
                right = 0.65, 
                top = 0.82)

ggsave("../poster/Data_games_count.png")

Wnioski:

  • Szachy zyskują na popularności, ale czy na pewno dzięki internetowi i cyfryzacji?

Jak wpłynął serial “Gambit królowej” na popularyzacje szachów?

Ciekawostka:

  • “Gambit królowej” to również nazwa debiutu szachowego. Debiut szachowy z kolei to początkowe posunięcia w szachach.

Gambit Królowej

Ile było granych debiutów “Gambit królowej”?

Tworzenie struktury zbioru danych

wyodrębnianie tagów oraz hiperłączy

hyperlinks = html_nodes(wezel, "a")
my_href = html_attr(hyperlinks, "href")

pander(head(bind_rows(lapply(html_attrs(hyperlinks), function(x) data.frame(as.list(x), stringsAsFactors=FALSE)))))
href
standard/lichess_db_standard_rated_2024-05.pgn.zst
standard/lichess_db_standard_rated_2024-05.pgn.zst.torrent
standard/lichess_db_standard_rated_2024-04.pgn.zst
standard/lichess_db_standard_rated_2024-04.pgn.zst.torrent
standard/lichess_db_standard_rated_2024-03.pgn.zst
standard/lichess_db_standard_rated_2024-03.pgn.zst.torrent

Wybór wersji pobierania (.zst.torrent) w funkcji ograniczającej scrapowanie względem wybranej daty

choose_max_date_to_scrap = function(date){
  
  # co drugi link to .zst.torrent wiec go wyodrebniam
  init_links_for_download = links[seq(2,nrow(links), by=2), 1]
  
  full_date_from_imported_links = data.frame(stri_match_all(data.frame(init_links_for_download),
                                     regex = "rated_\\s*(.*?)\\s*[.]"))[,2]
  
  date_location = which(stri_detect(full_date_from_imported_links, regex = date) == TRUE) 
  
  my_links_for_download = init_links_for_download[c(date_location:length(init_links_for_download))]
  
  return(my_links_for_download)
}

links_for_download = choose_max_date_to_scrap(selected_date)
pander(data.frame(prepared_links = head(links_for_download)))
prepared_links
https://database.lichess.org/standard/lichess_db_standard_rated_2024-04.pgn.zst.torrent
https://database.lichess.org/standard/lichess_db_standard_rated_2024-03.pgn.zst.torrent
https://database.lichess.org/standard/lichess_db_standard_rated_2024-02.pgn.zst.torrent
https://database.lichess.org/standard/lichess_db_standard_rated_2024-01.pgn.zst.torrent
https://database.lichess.org/standard/lichess_db_standard_rated_2023-12.pgn.zst.torrent
https://database.lichess.org/standard/lichess_db_standard_rated_2023-11.pgn.zst.torrent

Wyodrębianie daty ze scrapowania

full_date_from_imported_links = data.frame(stri_match_all(data.frame(links_for_download),
                                     regex = "rated_\\s*(.*?)\\s*[.]"))[,2]
pander(head(data.frame(full_date_from_imported_links)))
full_date_from_imported_links
2024-04
2024-03
2024-02
2024-01
2023-12
2023-11

Wyodrębianie roku, miesięcy, nazw miesięcy ze scrapowania

year_from_imported_links = data.frame(stri_match_all(data.frame(links_for_download),
                                     regex = "rated_\\s*(.*?)\\s*[-]"))[,2]
pander(head(data.frame(year_from_imported_links)))
year_from_imported_links
2024
2024
2024
2024
2023
2023
month_from_imported_links = data.frame(stri_match_all(data.frame(links_for_download),
                                     regex = "-\\s*(.*?)\\s*[.]"))[,2]
pander(head(data.frame(month_from_imported_links)))
month_from_imported_links
04
03
02
01
12
11
my_month_names = month.abb[as.integer(month_from_imported_links)]
pander(head(data.frame(my_month_names)))
my_month_names
Apr
Mar
Feb
Jan
Dec
Nov

Tworzenie struktury plików - lata

my_paths_year <- unique(paste(Dataset_path,"/",year_from_imported_links, sep=""))
pander(data.frame(my_paths_year))
my_paths_year
D:/moje/projekty/chess-in-the-digital-age/Dataset/2024
D:/moje/projekty/chess-in-the-digital-age/Dataset/2023
D:/moje/projekty/chess-in-the-digital-age/Dataset/2022
D:/moje/projekty/chess-in-the-digital-age/Dataset/2021
D:/moje/projekty/chess-in-the-digital-age/Dataset/2020
D:/moje/projekty/chess-in-the-digital-age/Dataset/2019
D:/moje/projekty/chess-in-the-digital-age/Dataset/2018
D:/moje/projekty/chess-in-the-digital-age/Dataset/2017
D:/moje/projekty/chess-in-the-digital-age/Dataset/2016
D:/moje/projekty/chess-in-the-digital-age/Dataset/2015
D:/moje/projekty/chess-in-the-digital-age/Dataset/2014
D:/moje/projekty/chess-in-the-digital-age/Dataset/2013
create_folders_year = function(){
  for(i in 1:length(my_paths_year)){
    dir.create(my_paths_year[i])
  }
}

Tworzenie struktury plików - miesiące

my_paths_month <- paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names, sep="")
pander(head(data.frame(my_paths_month)))
my_paths_month
D:/moje/projekty/chess-in-the-digital-age/Dataset/2024/04. Apr
D:/moje/projekty/chess-in-the-digital-age/Dataset/2024/03. Mar
D:/moje/projekty/chess-in-the-digital-age/Dataset/2024/02. Feb
D:/moje/projekty/chess-in-the-digital-age/Dataset/2024/01. Jan
D:/moje/projekty/chess-in-the-digital-age/Dataset/2023/12. Dec
D:/moje/projekty/chess-in-the-digital-age/Dataset/2023/11. Nov
create_folders_month = function(){
  for(i in 1:length(my_paths_month)){
    dir.create(my_paths_month[i])
  }
}

Definiowanie ścieżek do których będą pobierane pliki i definiowanie ich docelowych nazw

my_paths = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/lichess_db_standard_rated_",full_date_from_imported_links,".pgn.zst.torrent", sep="")
pander(head(data.frame(my_paths)))
my_paths
D:/moje/projekty/chess-in-the-digital-age/Dataset/2024/04. Apr/lichess_db_standard_rated_2024-04.pgn.zst.torrent
D:/moje/projekty/chess-in-the-digital-age/Dataset/2024/03. Mar/lichess_db_standard_rated_2024-03.pgn.zst.torrent
D:/moje/projekty/chess-in-the-digital-age/Dataset/2024/02. Feb/lichess_db_standard_rated_2024-02.pgn.zst.torrent
D:/moje/projekty/chess-in-the-digital-age/Dataset/2024/01. Jan/lichess_db_standard_rated_2024-01.pgn.zst.torrent
D:/moje/projekty/chess-in-the-digital-age/Dataset/2023/12. Dec/lichess_db_standard_rated_2023-12.pgn.zst.torrent
D:/moje/projekty/chess-in-the-digital-age/Dataset/2023/11. Nov/lichess_db_standard_rated_2023-11.pgn.zst.torrent
downloading = function(){
  create_folders_year()
  create_folders_month()
  for(j in 1:length(my_paths)){
    download.file(links_for_download[j], my_paths[j], mode="wb",Sys.sleep(0.2))
  }
}

Tworzenie struktury plikow i pobieranie w niej zbiorów danych

downloading()

Java-script dla przycisku

const my_div1 = document.getElementById("tworzenie-struktury");
  my_div1.style.display = 'none';
  
var button = document.getElementById("my_button");
  
function myFunction() {
  if (my_div1.style.display === 'none') {
    my_div1.style.display = 'block';
    button.innerHTML = "Ukryj";
  } else {
    my_div1.style.display = 'none';
    button.innerHTML = "Pokaż";
  }
}

Przygotowywanie zbiorów danych do użycia

Pobieranie informacji o liczbie partii poprzez scrapowanie oraz ograniczenie tego scrapowania względem wybranej daty

choose_max_date_to_scrap_game_count = function(date){
  # wybor tabeli do analizy
  my_games_count = html_table(wezel)[[3]]
  
  # wybor lat do analizy na podstawie linkow .zst.torrent
  init_links_for_download = links[seq(2,nrow(links), by=2), 1]
  
  # wyodrebnianie dat z linkow
  full_date_from_imported_links = data.frame(stri_match_all(data.frame(init_links_for_download),
                                     regex = "rated_\\s*(.*?)\\s*[.]"))[,2]
  
  # szukanie indeksow z datami odpowiadajacymi wybranej dacie
  date_location = which(stri_detect(full_date_from_imported_links, regex = date) == TRUE) 
  
  # wyswietlanie liczb gier dla wybranych dat
  games_count = my_games_count[c(date_location:length(init_links_for_download))]
  
  return(games_count)
}

games_count = choose_max_date_to_scrap_game_count(selected_date)
pander(head(data.frame(games_count)))
games_count
91,377,787
95,804,114
91,567,975
98,994,760
96,909,211
92,389,636

funkcja do ograniczania danych dla konkretnego zbioru względem miesiąca

preparing_month_dataset = function(date, data_size = 0.001){
  
  # szukanie indeksu wybranej daty
  searching_location = which(stri_detect(full_date_from_imported_links, regex = date) == TRUE) 
  
  # szukanie liczby meczy dla wybranej daty i ograniczanie jej wzgledem wyszukanych wartosci
  decreasing_game_number <- round((as.numeric(stri_replace_all(games_count,"",regex = "\\,"))*18*data_size)[searching_location])
  
  # definiowanie pliku do przerobki na podstawie wybranej daty
  pgn_file_to_read = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/lichess_db_standard_rated_",full_date_from_imported_links,".pgn", sep="")[searching_location]
  
  # wczytanie pliku do zdefiniowanej dlugosci
  my_pgn <- read.table(pgn_file_to_read,
                   quote="", sep="\n", stringsAsFactors=FALSE, nrows = decreasing_game_number)
  
  # szukanie ostatnich 22 nazw kolumn
  colnms <- sub("\\[(\\w+).+", "\\1", my_pgn[(decreasing_game_number-22):decreasing_game_number,1])
  
  # szukanie indeksow rozpoczynajacych nowe partie
  Event_location = which(stri_detect(colnms, regex = "Event") == TRUE) 
  
  # usuwanie danych pod wyszukanym indeksie
  my_pgn2 = my_pgn[1:(decreasing_game_number-22+Event_location-2),]
  
  # zapis przetworzonego zbioru do pliku
  pgn_file_to_write = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/Data_",date,".pgn", sep="")[searching_location]
  
  write.table(my_pgn2,pgn_file_to_write,col.names = FALSE,row.names = FALSE,quote = FALSE)
  
  test_path = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/Data_",date,".pgn", sep="")[searching_location]
  
  # tworzenie pliku dla statusu pracy
  file_txt_for_info = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/ograniczono_dane.txt", sep="")[searching_location]
  
  write.table(" ",file_txt_for_info)
  
  testing_last_char <- read.table(test_path,
                   quote="", sep="\n", stringsAsFactors=FALSE)
  return(tail(testing_last_char))
}

funkcja do usuwania oryginalnego zbioru danych względem miesiąca

removing_oryginal_dataset = function(date){
  # szukanie indeksu wybranej daty
  searching_location = which(stri_detect(full_date_from_imported_links, regex = date) == TRUE) 
  
  # definiowanie plikow do usuniecia
  file_pgn_zst_to_remove = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/lichess_db_standard_rated_",full_date_from_imported_links,".pgn.zst", sep="")[searching_location]
  
  file_pgn_to_remove = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/lichess_db_standard_rated_",full_date_from_imported_links,".pgn", sep="")[searching_location]
  
  # tworzenie pliku dla statusu pracy
  file_txt_for_info = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/usunieto_oryginal.txt", sep="")[searching_location]
  
  file.remove(file_pgn_zst_to_remove)
  
  file.remove(file_pgn_to_remove)
  
  write.table(" ",file_txt_for_info)
}

Wywoływanie funkcji przygotowującej zbiór z konkretnego miesiąca do późniejszej analizy

preparing_month_dataset("2021-12")

removing_oryginal_dataset("2013-01")

Java-script dla przycisku

const my_div2 = document.getElementById("przygotowywanie-zbiorow");
  my_div2.style.display = 'none';
  
var button2 = document.getElementById("my_button2");
  
function myFunction2() {
  if (my_div2.style.display === 'none') {
    my_div2.style.display = 'block';
    button2.innerHTML = "Ukryj kod";
  } else {
    my_div2.style.display = 'none';
    button2.innerHTML = "Pokaż kod";
  }
}

Liczenie zagranych debiutów “Gambit królowej” na przestrzeni lat

# komenda szukajaca powtarzalnosc danej kolumny
# zmienne = fct_count(fct_infreq(sub("\\[(\\w+).+", "\\1", pgn[1:nrow(pgn.df),1])))

files_to_read = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/Data_",full_date_from_imported_links,".pgn", sep="")

Percent_games_count=c()

for(i in 1:length(files_to_read)){
  pgn <- read.table(files_to_read[i], quote="", sep="\n", stringsAsFactors=FALSE)
  
  pgn.df <- data.frame(matrix(sub("\\[\\w+ \\\"(.+)\\\"\\]", "\\1", pgn[,1]),
                              byrow=TRUE, ncol=1))
  
  opening_index = which(sub("\\[(\\w+).+", "\\1", pgn[1:nrow(pgn.df),1]) == "Opening")
  
  All_games_count = length(pgn.df[opening_index,])
  
  Queens_Gambit_games_count = length(which(stri_detect(pgn.df[opening_index,], regex = "Queen's Gambit") == TRUE))

  Percent_games_count = append(Percent_games_count,(Queens_Gambit_games_count/All_games_count)*100)
}

Przewidywania:

  • Skoro termin “Gambit królowej” zyskał rozgłos dzięki serialowi, to debiut o tej nazwie powinien występować częściej po 2020 roku procentowo w stosunku do całości.

Wizualizacja

my_Percent_games_count <- data.frame(count=Percent_games_count)

Data_Queens_gambit_games_count = cbind(data.frame(year=year_from_imported_links),my_Percent_games_count)

ggplot(Data_Queens_gambit_games_count, aes(x = rev(1:length(games_count)), y = count)) +

  # typ wykresu
  geom_bar(stat = "identity",aes(fill = year)) +
  
  # opis
  labs(x = "Years", y = "Number of chess games ( % ) ", title = "Number of 'Queen's Gambit' openings played") +

  # legenda
  scale_fill_manual(name = "Years", values = unique(Data_Queens_gambit_games_count$year)) +
  expand_limits(x = c(0, NA), y = c(0,NA)) +
  scale_x_continuous(breaks = c(rev(first_index_of_years)),
                     labels = c(rev(unique(year_from_imported_links)))) +
  scale_y_continuous(labels = unit_format(unit = "%", scale = 1)) +
  theme(text = element_text(size = 20),
        axis.text.x = element_text(angle = 90, hjust = 1)) 

ggsave("../poster/Data_Queens_gambit_games_count.png")

Obserwacje:

  • Widać wyraźnie malejący stosunek granych debiutów “Gambit królowej”, po dacie premiery serialu.

Wnioski:

  • Serial znacząco zwiększył popularność szachów, wśród osób, które nie wiedzą co znaczy termin “Gambit królowej”.

Czy serial wpłynął tylko na nowicjuszy szachowych?

Wyszukiwanie Ilości zagranych partii przez nowicjuszy (ranga < 1200)

files_to_read = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/Data_",full_date_from_imported_links,".pgn", sep="")

novice_games_count=c()

for(i in 1:length(files_to_read)){
  pgn <- read.table(files_to_read[i], quote="", sep="\n", stringsAsFactors=FALSE)
  
  pgn.df <- data.frame(matrix(sub("\\[\\w+ \\\"(.+)\\\"\\]", "\\1", pgn[,1]),
                              byrow=TRUE, ncol=1))
  
  elo_index = which(sub("\\[(\\w+).+", "\\1", pgn[1:nrow(pgn.df),1]) == "WhiteElo")
  
  All_games_count = length(pgn.df[elo_index,])
  
  my_novice_games_count = length(which(as.numeric(pgn.df[elo_index,]) < 1200) == TRUE)

  novice_games_count = append(novice_games_count,(my_novice_games_count/All_games_count)*100)
}

Wizualizacja

my_Percent_novice_games_count <- data.frame(count=Percent_novice_games_count)

Data_novice_games_count = cbind(data.frame(year=year_from_imported_links),my_Percent_novice_games_count)

ggplot(Data_novice_games_count, aes(x = rev(1:length(games_count)), y = count)) +

  # typ wykresu
  geom_bar(stat = "identity",aes(fill = year)) +
  
  # opis
  labs(x = "Years", y = "Number of chess games ( % ) ", title = "Number of novice games (rank < 1200) ") +

  # legenda
  scale_fill_manual(name = "Years", values = unique(Data_novice_games_count$year)) +
  expand_limits(x = c(0, NA), y = c(0,NA)) +
  scale_x_continuous(breaks = c(rev(first_index_of_years)),
                     labels = c(rev(unique(year_from_imported_links)))) +
  scale_y_continuous(labels = unit_format(unit = "%", scale = 1)) +
  theme(text = element_text(size = 20),
        axis.text.x = element_text(angle = 90, hjust = 1))  +
  
  # strzalka
  geom_segment(aes(58, 10.68, xend = 95, yend = 10),
               linewidth=1.2,
               arrow = arrow(length = unit(5, "mm"))) +
  
  # zdjecia
    inset_element(p = img, 
                left = 0.02, 
                bottom = 0.54, 
                right = 0.42, 
                top = 0.9)

ggsave("../poster/Data_novice_games_count.png")

Obserwacje:

  • Widać znaczny napływ nowicjuszy po dacie premiery serialu

Wniosek:

  • Po oglądnięciu serialu, osoby, które nie znają się na szachach, chcą po prostu zagrać w tę grę i nie interesują się debiutami. To powoduje chęć zagrania na stronie lichess jako nowicjusz.

Wyszukiwanie Ilości zagranych partii przez zaawansowanych graczy (ranga > 1800)

files_to_read = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/Data_",full_date_from_imported_links,".pgn", sep="")

pro_games_count=c()

for(i in 1:length(files_to_read)){
  pgn <- read.table(files_to_read[i], quote="", sep="\n", stringsAsFactors=FALSE)
  
  pgn.df <- data.frame(matrix(sub("\\[\\w+ \\\"(.+)\\\"\\]", "\\1", pgn[,1]),
                              byrow=TRUE, ncol=1))
  
  elo_index_v2 = which(sub("\\[(\\w+).+", "\\1", pgn[1:nrow(pgn.df),1]) == "WhiteElo")
  
  All_games_count_v2 = length(pgn.df[elo_index_v2,])
  
  my_pro_games_count = length(which(as.numeric(pgn.df[elo_index_v2,]) > 1800) == TRUE)

  pro_games_count = append(pro_games_count,(my_pro_games_count/All_games_count_v2)*100)
}

Wizualizacja

my_Percent_pro_games_count <- data.frame(count=Percent_pro_games_count)

Data_pro_games_count = cbind(data.frame(year=year_from_imported_links),my_Percent_pro_games_count)

ggplot(Data_pro_games_count, aes(x = rev(1:length(games_count)), y = count)) +

  # typ wykresu
  geom_bar(stat = "identity",aes(fill = year)) +
  
  # opis
  labs(x = "Years", y = "Number of chess games ( % ) ", title = "Number of pro games (rank > 1800) ") +

  # legenda
  scale_fill_manual(name = "Years", values = unique(Data_pro_games_count$year)) +
  expand_limits(x = c(0, NA), y = c(0,NA)) +
  scale_x_continuous(breaks = c(rev(first_index_of_years)),
                     labels = c(rev(unique(year_from_imported_links)))) +
  scale_y_continuous(labels = unit_format(unit = "%", scale = 1)) +
  theme(text = element_text(size = 20),
        axis.text.x = element_text(angle = 90, hjust = 1))  +
  
  # strzalka
  geom_segment(aes(65, 38, xend = 86, yend = 37),
               linewidth=1.2,
               arrow = arrow(length = unit(5, "mm"))) +
  inset_element(p = img3, 
                left = 0.39, 
                bottom = 0.84, 
                right = 0.6, 
                top = 0.97)

ggsave("../poster/Data_pro_games_count.png")

Obserwacje:

  • Widać spadek ilości zaawansowanych graczy po 2020 roku w stosunku do całości

Wnioski:

  • Serial wpłynął na popularyzacje szachów, głównie zachęcając do gry nowicjuszy.

Dla czego serial nie wpłynął na zaawansowanych graczy?

Czy dodanie do szachów nowości wpłynie na zaawansowanych graczy?

Tworzenie nowego debiutu

Statystyki popularności Anny Cramling na twitch

Obserwacje:

Widać wyraźny wzrost popularności streamerki:
  • kiedy opublikowała swój debiut szachowy na youtube

Wyszukiwanie debiutów po sekwencji posunięć, zamiast po nazwie

Wcześniejsza wersja kodu

# filtred_two_moves_to_compare = two_moves_to_compare[c(5,6) + rep(seq(0, length(two_moves_to_compare), 6), each = 2)]

# files_to_read = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/Data_",full_date_from_imported_links,".pgn", sep="")

my_opening=c()

for(i in 1:1){
  pgn <- read.table(files_to_read[i], quote="", sep="\n", stringsAsFactors=FALSE)
  
  moves = which(sub("\\[(\\b+).+", "\\1", pgn[1:nrow(pgn),1]) != "")
  head(moves)
  my_five_moves = unlist(stri_match_all(pgn[moves,], regex = "1. \\s*(.*?)\\s* 6. "))
  head(my_five_moves)
  filtred_five_moves = my_five_moves[seq(1,length(my_five_moves), by=2)]
  head(filtred_five_moves)
 
  my_opening_index=c()
  
  for(i in 1:length(filtred_five_moves)){
  five_moves_to_compare = stri_match_all(filtred_five_moves, regex = "(\\d. (\\w+))")[[i]][,3]
  ifelse(setequal(five_moves_to_compare,c("e3","d3","Ne2","Nd2","Ng3"))== TRUE,
         (my_opening_index = append(my_opening_index,i)),
         next)
  }

Java-script dla przycisku

const searching_v1_div = document.getElementById("searching_v1_div");
  searching_v1_div.style.display = 'none';
  
var searching_v1 = document.getElementById("searching_v1");
  
function searching() {
  if (searching_v1_div.style.display === 'none') {
    searching_v1_div.style.display = 'block';
    searching_v1.innerHTML = "Ukryj kod";
  } else {
    searching_v1_div.style.display = 'none';
    searching_v1.innerHTML = "Pokaż kod";
  }
}

Nowsza wersja kodu

#files_to_read = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/Data_",full_date_from_imported_links,".pgn", sep="")

search_by_moves = function(searching_moves){
  opening_count=c()
  
  for(i in 1:length(files_to_read)){
    pgn <- read.table(files_to_read[i], quote="", sep="\n", stringsAsFactors=FALSE)
    
    moves = which(sub("\\[(\\b+).+", "\\1", pgn[1:nrow(pgn),1]) != "")
    
    All_games_count = length(pgn[moves,])
    
    for(j in 1:length(searching_moves)){
      my_regex = paste("(.*?)\\s*",j,". (\\w+).+", sep = "")
      moves_to_compare = sub(my_regex, "\\2", pgn[moves,])
      moves = moves[which(moves_to_compare == searching_moves[j])]
    }
  
    opening_count = append(opening_count,(length(moves)/All_games_count)*100)
  }
  return(opening_count)
}

search_by_moves(c("e3","d3","Ne2","Nd2","Ng3"))

Wizualizacja

my_opening_count <- data.frame(count=opening_count)

Data_opening_count = cbind(data.frame(year=year_from_imported_links),my_opening_count)

ggplot(Data_opening_count, aes(x = rev(1:length(games_count)), y = count)) +

  # typ wykresu
  geom_bar(stat = "identity",aes(fill = year)) +
  
  # opis
  labs(x = "Years", y = "Number of chess games", title = "Number of 'Cow' move sequences played") +

  # legenda
  scale_fill_manual(name = "Years", values = unique(Data_opening_count$year)) +
  expand_limits(x = c(0, NA), y = c(0, 10)) +
  scale_x_continuous(breaks = c(rev(first_index_of_years)),
                     labels = c(rev(unique(year_from_imported_links)))) +
  theme(text = element_text(size = 20),
        axis.text.x = element_text(angle = 90, hjust = 1))  +
  
  # strzalka
  geom_segment(aes(80, 3.2, xend = 123, yend = 2.2),
               linewidth=1.2,
               arrow = arrow(length = unit(5, "mm"))) +
  inset_element(p = img4, 
                left = 0.42, 
                bottom = 0.34, 
                right = 0.75, 
                top = 0.51)

ggsave("../poster/Data_opening_count.png")

Obserwacje:

  • Widać wzrost popularności streamerki po opublikowaniu nowego debiutu oraz widać wzrost zainteresowania jej stworzoną sekwencją szachową

Wnioski:

  • Dodawanie do szachów nowości wpływa na aktywizacje zaawansowanych graczy

Podsumowanie

Internet i media społecznościowe znacząco wpływają na popularyzacje szachów, jednak z różnym natężeniem w zależności od tego, na jak zaawansowaną grupe oddziałowuje

Zastosowanie prezentacji

Tworzenie treści w internecie

  • Podczas publikowania treści w internecie, należy być świadomym, że zawsze istnieją grupy, które są zaznajomione z tematyką treści oraz grupy, dla których tworzona treść jest całkowicie nowa.

Wyszukiwanie partii względem wybranej sekwencji

files_to_read = paste(Dataset_path,"/",year_from_imported_links,"/",month_from_imported_links,". ",my_month_names,"/Data_",full_date_from_imported_links,".pgn", sep="")

search_games_by_moves = function(searching_moves, months_to_search){
  openings=data.frame()
  
  for(i in 1:months_to_search){
    pgn <- read.table(files_to_read[i], quote="", sep="\n", stringsAsFactors=FALSE)
    
    moves = which(sub("\\[(\\b+).+", "\\1", pgn[1:nrow(pgn),1]) != "")
    
    for(j in 1:length(searching_moves)){
      my_regex = paste("(.*?)\\s*",j,". (\\w+).+", sep = "")
      moves_to_compare = sub(my_regex, "\\2", pgn[moves,])
      moves = moves[which(moves_to_compare == searching_moves[j])]
    }
  
    for(z in 1:length(pgn[moves,])){
      moves_detect_whiteV1 = stri_match_all(pgn[moves,], regex = "(\\d. (\\w+))")[[z]][,3]
      moves_detect_white = paste(1:length(moves_detect_whiteV1),". ",moves_detect_whiteV1, sep = "")
      
      moves_detect_blackV1 = stri_match_all(pgn[moves,], regex = "(\\d.\\.. (\\w+))")[[z]][,3]
      moves_detect_black = paste(1:length(moves_detect_blackV1),"... ",moves_detect_blackV1, sep = "")
      
      opening_moves = numeric(length(moves_detect_white)+length(moves_detect_black))
      
      opening_moves[seq(1, length(opening_moves), by=2)] <- moves_detect_white
      opening_moves[seq(2, length(opening_moves), by=2)] <- moves_detect_black
      
      openings = rbind(openings,moves=paste(opening_moves, collapse = " "))
    }
    colnames(openings) = "moves"
        
  }
  return(openings)
}

pander(head(search_games_by_moves(c("Nf3","Ng1"),1)))
moves
1. Nf3 1… d5 2. Ng1 2… Nf6 3. Nf3 3… Bf5 4. Ng1 4… e6 5. e3 5… Be7 6. d4 6… O 7. Nf3 7… c5 8. Bd3 8… Nc6 9. Bxf5 9… exf5 10. O 10… Ne4 11. c3 11… Re8 12. Nbd2 12… a5 13. Nb3 13… Qb6 14. dxc5 14… Nxc5 15. Nxc5 15… Qxc5 16. Nd4 16… Rad8 17. Nxf5 17… Bf8 18. Nd4 18… g6 19. Nxc6 19… bxc6 20. Qf3 20… Bg7 21. Re1 21… a4 22. a3 22… Rb8 23. g3 23… h5 24. e4 24… Rxe4 25. Rxe4 25… dxe4 26. Qxe4 26… Rd8 27. Be3 27… Qb5 28. Qb4 28… Qd5 29. Qxa4 29… Be5 30. Bd4 30… Bxd4 31. cxd4 31… Qxd4 32. Qxd4 32… Rxd4 33. Rc1 33… Rd2 34. b4 34… Ra2 35. Rxc6 35… Rxa3 36. b5 36… Rb3 37. b6 37… Kg7 38. Kg2 38… Rb1 39. Kf3 39… Rb3 40. Ke4 40… Rb4 41. Kd5 41… Rb1 42. Kd6 42… Rd1 43. Kc7 43… Rb1
1. Nf3 1… c5 2. Ng1 2… g6 3. Nf3 3… Bg7 4. Ng1 4… Nc6 5. Nf3 5… d5 6. Ng1 6… Nf6 7. Nf3 7… O 8. d3 8… e5 9. g3 9… Qe7 10. Bg2 10… Be6 11. Nbd2 11… a5 12. c4 12… d4 13. b3 13… Rab8 14. O 14… b5 15. Re1 15… bxc4 16. Nxc4 16… Rb4 17. Ba3 17… Rb5 18. e3 18… Bxc4 19. dxc4 19… Rb6 20. exd4 20… cxd4 21. Bxe7 21… Nxe7 22. Nxe5 22… Ne4 23. Rxe4
1. Nf3 1… e6 2. Ng1 2… Bc5 3. Nf3 3… d6 4. e3 4… Nf6 5. d4 5… Bb6 6. Bd2 6… c5 7. Nc3 7… cxd4 8. exd4 8… Ng4 9. h3 9… Nf6 10. Qe2 10… Ba5 11. O 11… Qc7 12. Qe3 12… d5 13. Ne5 13… Ne4 14. Nb5 14… Bxd2 15. Rxd2 15… Qa5 16. Nd6 16… Nxd6 17. Rd3 17… Nc4 18. Qe1 18… Qb5 19. Rb3 19… Qa4 20. Bxc4 20… dxc4 21. Rb4 21… Qxa2 22. Kd2 22… O 23. Qe3 23… Rd8 24. h4 24… Qa5 25. c3 25… Nc6 26. f4 26… Nxb4 27. cxb4 27… Qxb4
1. Nf3 1… d5 2. Ng1 2… e5 3. g3 3… e4 4. Bg2 4… f5 5. e3 5… f4 6. d3 6… f3 7. Bf1 7… c6 8. dxe4 8… dxe4 9. Qxd8 9… Kxd8 10. Nc3 10… Nf6 11. Bd2 11… Bf5 12. Bc4 12… Nbd7 13. O 13… b5 14. Bb3 14… Kc7 15. a3 15… Nc5 16. Be1 16… Nxb3 17. cxb3 17… a6 18. Kb1 18… a5 19. Na2 19… b4 20. a4 20… Be7 21. Bd2 21… Rhd8 22. Bc1 22… Rxd1 23. h3 23… h5 24. h4 24… Bg4 25. Kc2 25… Rf1 26. Kd2 26… Rxf2 27. Ke1 27… Rg2 28. Nh3 28… Nd5 29. Nf4 29… Nxf4 30. gxf4 30… Bc5
1. Nf3 1… c5 2. Ng1 2… d5 3. g3 3… d4 4. Bg2 4… Bg4 5. d3 5… Bxe2 6. Qxe2 6… Nc6 7. Nf3 7… Nb4 8. O 8… Nf6 9. a3 9… Nc6 10. Qd2 10… e6 11. Qd1 11… Bd6 12. Bg5 12… O 13. h4 13… h6 14. Bxf6 14… Qxf6 15. Nbd2 15… Qg6 16. Ne4 16… Bc7 17. c4 17… dxc3 18. bxc3 18… Rad8 19. Qe2 19… f5 20. Nxc5 20… Bxg3 21. fxg3 21… Qxg3 22. Qxe6 22… Kh7 23. d4 23… Rfe8 24. Qc4 24… Rf8 25. Ne6 25… g5 26. Nxf8 26… Rxf8 27. Ne5 27… gxh4 28. Nxc6 28… bxc6 29. Rae1 29… h3 30. Re7 30… Kg6 31. Qe6 31… Kh5 32. Kh1 32… Qxg2
1. Nf3 1… e6 2. Ng1 2… Be7 3. f4 3… d6 4. Nf3 4… c6 5. e3 5… b6 6. d4 6… Bb7 7. Bd3 7… Nd7 8. O 8… Ngf6 9. c3 9… O 10. Bc2 10… c5 11. Nbd2 11… cxd4 12. exd4 12… e5 13. Nb1 13… exf4 14. Bxf4 14… Nd5 15. Nbd2 15… Nxf4 16. Ne4 16… Nd5 17. Qd3 17… N5f6 18. Nxf6 18… Nxf6 19. Ng5 19… g6 20. Rxf6 20… Bxf6 21. Nf3 21… Re8 22. Rf1 22… Be4 23. Qd1 23… Bxc2 24. Qxc2 24… d5 25. Qf2 25… b5 26. Qg3 26… Bg7 27. Qh3 27… Qb6 28. Ng5 28… Bxd4 29. Kh1 29… h6 30. Qf3 30… Bg7 31. Qxf7 31… Kh8 32. Ne6 32… Rxe6 33. Qf8 33… Rxf8 34. Rxf8 34… Bxf8 35. h4